Take-Home Exercise 2

Published

December 9, 2023

Modified

December 16, 2023

Getting started

importing package to accommodate data wrangling and visualisation

pacman::p_load(sf, tmap, sfdep, tidyverse, stplanr, sp, reshape2, httr, performance)

Data Preparation

Importing several data for analysis

Bus Stop

importing Bus stop locations

busStops <- st_read(dsn = "data/geospatial",
                 layer = "BusStop") %>%
  st_transform(crs = 3414)
Reading layer `BusStop' from data source 
  `/Users/michaelberlian/Desktop/SMU Nov/ISSS624 GeoSpatial/ISSS624/Take-Home_Ex/Take-Home_Ex2/data/geospatial' 
  using driver `ESRI Shapefile'
Simple feature collection with 5161 features and 3 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 3970.122 ymin: 26482.1 xmax: 48284.56 ymax: 52983.82
Projected CRS: SVY21
tmap_mode("view")
tmap mode set to interactive viewing
tm_shape(busStops) +
  tm_dots() +
tm_view(set.zoom.limits = c(11,14))
tmap_mode("plot")
tmap mode set to plotting

Creating Hexagonal Grid

creating hexagonal grid as the base of analysis. The grid size is 750m, the distance between parallel edges.

grid = st_make_grid(busStops, c(750), what = "polygons", square = FALSE)
# To sf and add grid ID
grid_sf = st_sf(grid) %>%
  # add grid ID
  mutate(grid_id = 1:length(lengths(grid)))
grid_sf$n_colli = lengths(st_intersects(grid_sf, busStops))

# remove grid without value of 0 (i.e. no points in side that grid)
grid_count = filter(grid_sf,n_colli > 0 )
tmap_mode("view")
tmap mode set to interactive viewing
tm_shape(grid_count) +
  tm_fill(
    col = "n_colli",
    palette = "Blues",
    style = "cont",
    title = "Number of collisions",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6,
    popup.vars = c(
      "Number of collisions: " = "n_colli"
    ),
    popup.format = list(
      n_colli = list(format = "f", digits = 0)
    )
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
tm_view(set.zoom.limits = c(11,14))
tmap_mode("plot")
tmap mode set to plotting

removing bus stop outside of Singapore

grid_count_rm <- grid_count %>%
  filter(!grid_id == 942,
         !grid_id == 984,
         !grid_id == 819)

saving grid to RDS

write_rds(grid_count_rm, "data/rds/grid.rds")
rm(list = c('grid_count','grid_count_rm','grid_sf'))

Reload Grid

grid = read_rds('data/rds/grid.rds')

Trip Data

importing trip data for trips analysis

busTrips <- read_csv("data/aspatial/origin_destination_bus_202308.csv")
Rows: 5709512 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): YEAR_MONTH, DAY_TYPE, PT_TYPE, ORIGIN_PT_CODE, DESTINATION_PT_CODE
dbl (2): TIME_PER_HOUR, TOTAL_TRIPS

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# busTrips <- read_csv("data/aspatial/origin_destination_bus_202309.csv")
# busTrips <- read_csv("data/aspatial/origin_destination_bus_202310.csv")

busTrips$ORIGIN_PT_CODE <- as.factor(busTrips$ORIGIN_PT_CODE)
busTrips$DESTINATION_PT_CODE <- as.factor(busTrips$DESTINATION_PT_CODE)

MPSZ Data

importin mpsz data for visualisation

mpsz <- st_read(dsn = "data/geospatial",
                 layer = "MP14_SUBZONE_WEB_PL") %>%
  st_transform(crs = 3414)
Reading layer `MP14_SUBZONE_WEB_PL' from data source 
  `/Users/michaelberlian/Desktop/SMU Nov/ISSS624 GeoSpatial/ISSS624/Take-Home_Ex/Take-Home_Ex2/data/geospatial' 
  using driver `ESRI Shapefile'
Simple feature collection with 323 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
Projected CRS: SVY21
tmap_mode('plot')
tmap mode set to plotting
tm_shape(mpsz) +
  tm_polygons(col='grey', border.alpha = 0.1) +
tm_shape(grid) +
  tm_fill(
    col = "n_colli",
    palette = "Blues",
    style = "cont",
    title = "Number of collisions",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6,
    popup.vars = c(
      "Number of collisions: " = "n_colli"
    ),
    popup.format = list(
      n_colli = list(format = "f", digits = 0)
    )
  ) +
  tm_borders(col = "grey40", lwd = 0.7)

Population data based on HDB

from HDB data, proxy population was counted from total dwelling units. This population will be used as propulsiveness data.

hdb <- read_csv('data/aspatial/hdb.csv')
New names:
Rows: 12442 Columns: 37
── Column specification
──────────────────────────────────────────────────────── Delimiter: "," chr
(18): blk_no, street, residential, commercial, market_hawker, miscellane... dbl
(19): ...1, max_floor_lvl, year_completed, total_dwelling_units, 1room_s...
ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
Specify the column types or set `show_col_types = FALSE` to quiet this message.
• `` -> `...1`
hdb_sf <- hdb %>%
  rename(latitude = lat, 
         longitude = lng) %>%
  select(latitude, longitude, total_dwelling_units) %>%
  st_as_sf(coords = c('longitude','latitude'), 
           crs=4326) %>%
  st_transform(crs = 3414)

Business, Retail, and School Data

business and retail dataset will be used as attractiveness data.

business <- st_read(dsn = "data/geospatial",
                 layer = "Business") %>%
  st_transform(crs = 3414)
Reading layer `Business' from data source 
  `/Users/michaelberlian/Desktop/SMU Nov/ISSS624 GeoSpatial/ISSS624/Take-Home_Ex/Take-Home_Ex2/data/geospatial' 
  using driver `ESRI Shapefile'
Simple feature collection with 6550 features and 3 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 3669.148 ymin: 25408.41 xmax: 47034.83 ymax: 50148.54
Projected CRS: SVY21 / Singapore TM
retail <- st_read(dsn = "data/geospatial",
                  layer = "Retails") %>%
  st_transform(crs = 3414)
Reading layer `Retails' from data source 
  `/Users/michaelberlian/Desktop/SMU Nov/ISSS624 GeoSpatial/ISSS624/Take-Home_Ex/Take-Home_Ex2/data/geospatial' 
  using driver `ESRI Shapefile'
Simple feature collection with 37635 features and 3 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 4737.982 ymin: 25171.88 xmax: 48265.04 ymax: 50135.28
Projected CRS: SVY21 / Singapore TM

the geospatial school data will be retrieve using the postal code. By geocoding using onemap API. The General information of school csv will contain the postal code and using the onemap API, it will retrieve and give us the longitude and latitude coordinate of the school. The schools postcode where the API could not retrieve the coordinate will be stored at not_found and later be filled manually.

url <- "https://www.onemap.gov.sg/api/common/elastic/search"

csv <- read_csv('data/aspatial/Generalinformationofschools.csv')
postcodes <- csv$postal_code

found <- data.frame()
not_found <- data.frame()

for(postcode in postcodes){
  query <- list('searchVal'=postcode, 'returnGeom'='Y','getAddrDetails'='Y','pageNum'='1')
  res <- GET(url,query=query)
  
  if((content(res)$found) != 0){
    found <- rbind(found,data.frame(content(res))[4:13])
  } else {
    not_found <- data.frame(postcode)
  }
}
merged <- merge(csv, found, by.x = 'postal_code', by.y = 'results.POSTAL', all = TRUE)
write.csv(merged, file = 'data/aspatial/schools.csv')
write.csv(not_found, file = 'data/aspatial/not_found.csv')
schools <- read_csv(file = 'data/aspatial/schools.csv')
New names:
Rows: 350 Columns: 41
── Column specification
──────────────────────────────────────────────────────── Delimiter: "," chr
(35): school_name, url_address, address, telephone_no, telephone_no_2, f... dbl
(6): ...1, postal_code, results.X, results.Y, results.LATITUDE, results...
ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
Specify the column types or set `show_col_types = FALSE` to quiet this message.
• `` -> `...1`
schools <- schools %>%
  rename(latitude = results.LATITUDE, 
         longitude = results.LONGITUDE) %>%
  select(postal_code, school_name, latitude, longitude)

transforming the data from longitude and latitude data into geometry

schools_sf <- st_as_sf(schools, 
                       coords = c('longitude','latitude'), 
                       crs=4326) %>%
  st_transform(crs = 3414)

OD Flow

Trip count

calculating trip count of weekday morning

busTripsDayMorning <- busTrips %>%
  filter(DAY_TYPE == "WEEKDAY", 
         TIME_PER_HOUR >= 6, 
         TIME_PER_HOUR <= 9) %>%
  select(ORIGIN_PT_CODE,DESTINATION_PT_CODE,TOTAL_TRIPS) %>%
  rename(DESTIN_PT_CODE = DESTINATION_PT_CODE)

Creating the Flow Data

making object to represent bus stop location to a grid

busStops_grid <- st_intersection(busStops, grid) %>%
  select(BUS_STOP_N, grid_id) %>%
  st_drop_geometry()

we are going to change the trips identity from bus stops code to grid id

od_data <- left_join(busTripsDayMorning , busStops_grid,
            by = c("ORIGIN_PT_CODE" = "BUS_STOP_N")) %>%
  rename(ORIGIN_BS = ORIGIN_PT_CODE,
         ORIGIN_GRID = grid_id,
         DESTIN_BS = DESTIN_PT_CODE)

removing duplicates

duplicate <- od_data %>%
  group_by_all() %>%
  filter(n()>1) %>%
  ungroup()
od_data <- unique(od_data)
od_data <- left_join(od_data , busStops_grid,
            by = c("DESTIN_BS" = "BUS_STOP_N")) 
duplicate <- od_data %>%
  group_by_all() %>%
  filter(n()>1) %>%
  ungroup()
od_data <- unique(od_data)

grouping the trips based on origin grid and destination grid

od_data <- od_data %>%
  rename(DESTIN_GRID = grid_id) %>%
  drop_na() %>%
  group_by(ORIGIN_GRID, DESTIN_GRID) %>%
  summarise(TRIPS = sum(TOTAL_TRIPS))
write_rds(od_data, "data/rds/od_data.rds")
od_data <- read_rds("data/rds/od_data.rds")

Creating the flow

first, we remove intra zonal flow

od_data1 <- od_data[od_data$ORIGIN_GRID!=od_data$DESTIN_GRID,]
flowLine <- od2line(flow = od_data1, 
                    zones = grid,
                    zone_code = "grid_id")
Creating centroids representing desire line start and end points.

Visualising O-D Flow

O-D Flow unfiltered

tm_shape(mpsz) +
  tm_polygons(col = 'grey', border.alpha = 0.1) +
tm_shape(grid) +
  tm_polygons() +
flowLine %>%
tm_shape() +
  tm_lines(lwd = "TRIPS",
           style = "quantile",
           scale = c(0.1, 1, 3, 5, 7, 10),
           n = 6,
           alpha = 0.3)

O-D Flow filtered for 10000 or more trips

tm_shape(mpsz) +
  tm_polygons(col = 'grey', border.alpha = 0.1) +
tm_shape(grid) +
  tm_polygons() +
flowLine %>%  
  filter(TRIPS >= 10000) %>%
tm_shape() +
  tm_lines(lwd = "TRIPS",
           style = "quantile",
           scale = c(0.1, 1, 3, 5, 7, 10),
           n = 6,
           alpha = 0.3)
Warning in g$scale * (x/maxW): longer object length is not a multiple of
shorter object length

O-D Flow filtered for 30000 or more trips

tm_shape(mpsz) +
  tm_polygons(col = 'grey', border.alpha = 0.1) +
tm_shape(grid) +
  tm_polygons() +
flowLine %>%  
  filter(TRIPS >= 30000) %>%
tm_shape() +
  tm_lines(lwd = "TRIPS",
           style = "quantile",
           scale = c(3, 5, 7, 10),
           n = 4,
           alpha = 0.3)

Spatial Interaction Model Data Preparation

Propulsiveness Data Wrangling

there will be 3 propulsiveness variable embedded into the origin grid, they are:

  1. Population per grid

    total number of dwelling will be a representation of the number of population in the grid

    grid_prop <- st_join(hdb_sf,grid, join = st_within) %>%
      select(total_dwelling_units, grid_id) %>%
      st_drop_geometry() %>%
      rename(POPULATION_COUNT = total_dwelling_units)
    grid_prop <- grid %>%
      left_join(grid_prop, by = c('grid_id' = 'grid_id')) 
    
    grid_prop$POPULATION_COUNT <- ifelse(
      is.na(grid_prop$POPULATION_COUNT),
      0.99, grid_prop$POPULATION_COUNT)
    
    grid_prop$POPULATION_COUNT <- ifelse(
      grid_prop$POPULATION_COUNT == 0,
      0.99, grid_prop$POPULATION_COUNT)
    
    grid_prop <- grid_prop %>%
      group_by(grid_id, n_colli) %>%
      summarise(POPULATION_COUNT = sum(POPULATION_COUNT))
  2. Number of HDB per grid

    Number of HDB per grid will be counted by using intersection between HDB point with the grid hexagonal polygon

    grid_prop$HDB_COUNT <- lengths (
      st_intersects(
        grid,hdb_sf))
    
    grid_prop$HDB_COUNT <- ifelse(
      grid_prop$HDB_COUNT == 0,
      0.99, grid_prop$HDB_COUNT)
  3. Number of Bus Station per grid

    Number of bus station per grid will be using the number of collision computed previously when previewing the hexagons

    grid_prop <- grid_prop %>%
      st_drop_geometry() %>%
      rename(BUS_N = n_colli)
    
    grid_prop$BUS_N <- ifelse(
      grid_prop$BUS_N == 0,
      0.99, grid_prop$BUS_N)
    write_rds(grid_prop,'data/rds/grid_prop.rds')
    grid_prop <- read_rds('data/rds/grid_prop.rds')

putting all the propulsiveness variable into flow data

flowLine <- flowLine %>%
left_join(grid_prop, by = c('ORIGIN_GRID' = 'grid_id'))
grid_plot <- grid %>%
  select(grid_id) %>%
  left_join(grid_prop)
Joining with `by = join_by(grid_id)`
tm_shape(mpsz) +
  tm_polygons(col = 'grey', border.alpha = 0.1) +
tm_shape(grid_plot) +
  tm_fill(
    col = "POPULATION_COUNT",
    palette = "Blues",
    style = "cont",
    title = "Number of Bus Station",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6,
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
flowLine %>%  
  filter(TRIPS >= 10000) %>%
tm_shape() +
  tm_lines(lwd = "TRIPS",
           style = "quantile",
           scale = c(0.1, 1, 3, 5, 7, 10),
           n = 6,
           alpha = 0.3) 
Warning in g$scale * (x/maxW): longer object length is not a multiple of
shorter object length

Attractiveness Data Wrangling

there will be 3 attractiveness variable embedded into the destination grid, they are:

  1. Number of School per grid

    Number of School per grid will be counted by using intersection between school point with the grid hexagonal polygon

    grid_att <- grid %>%
      select (-c(n_colli)) %>%
      st_drop_geometry()
    grid_att$SCHOOL_COUNT <- lengths(
      st_intersects(grid,schools_sf)
    )
    
    grid_att$SCHOOL_COUNT <- ifelse(
      grid_att$SCHOOL_COUNT == 0,
      0.99, grid_att$SCHOOL_COUNT)
  2. Number of Business per grid

    Number of Business per grid will be counted by using intersection between business point with the grid hexagonal polygon

    grid_att$BUSINESS_COUNT <- lengths(
      st_intersects(grid,business)
    )
    
    grid_att$BUSINESS_COUNT <- ifelse(
      grid_att$BUSINESS_COUNT == 0,
      0.99, grid_att$BUSINESS_COUNT)
  3. Number of Retail per grid

    Number of Retail per grid will be counted by using intersection between retail point with the grid hexagonal polygon

    grid_att$RETAIL_COUNT <- lengths(
      st_intersects(grid,retail)
    )
    
    grid_att$RETAIL_COUNT <- ifelse(
      grid_att$RETAIL_COUNT == 0,
      0.99, grid_att$RETAIL_COUNT
    )
    write_rds(grid_att, "data/rds/grid_att.rds") 
    grid_att <- read_rds('data/rds/grid_att.rds')

putting all the attractiveness variable into flow data

flowLine <- flowLine %>%
left_join(grid_att, by = c('DESTIN_GRID' = 'grid_id'))

Calculating the distance between grid

making the grids into spatial and contain column grid id and the geomtery

grid_sp <- grid %>%
  select (-c(n_colli)) %>%
  as('Spatial')
grid_sp

using spDists function of sp package to calculate the distance between polygon centroid creating a matrix

dist <- spDists(grid_sp, 
                longlat = FALSE)

head(dist, n=c(10, 10))

getting the grid_id to label the matrix column and row

grid_ids <- grid_sp$grid_id
colnames(dist) <- paste0(grid_ids)
rownames(dist) <- paste0(grid_ids)

using melt to transfrom the matrix into column based for easier join with the main flow data

distPair <- melt(dist) %>%
  rename(dist = value)
head(distPair, 10)

view the summary of distPair and then change the minimum value of 0 to 50 for the intra-zonal distance to prevent error in modelling.

distPair %>%
  filter(dist > 0) %>%
  summary()

distPair$dist <- ifelse(distPair$dist == 0,
                        50, distPair$dist)

distPair <- distPair %>%
  rename(orig = Var1,
         dest = Var2)
write_rds(distPair, "data/rds/distPair.rds") 
distPair <- read_rds('data/rds/distPair.rds')
summary(distPair)
      orig           dest           dist      
 Min.   :  23   Min.   :  23   Min.   :   50  
 1st Qu.: 871   1st Qu.: 871   1st Qu.: 8250  
 Median :1326   Median :1326   Median :13269  
 Mean   :1270   Mean   :1270   Mean   :14118  
 3rd Qu.:1689   3rd Qu.:1689   3rd Qu.:18929  
 Max.   :2505   Max.   :2505   Max.   :44680  

getting the distance column into flow data

flowLine <- flowLine %>%
left_join(distPair, by = c('DESTIN_GRID' = 'dest', 'ORIGIN_GRID' = 'orig'))

saving the flow data after combining all the needed variable

write_rds(flowLine, "data/rds/flowData.rds") 
flowData <- read_rds("data/rds/flowData.rds")

Building Spatial Interaction Models

Unconstrained Model

uncSIM <- glm(formula = TRIPS ~ 
                log(POPULATION_COUNT) +
                log(HDB_COUNT) +
                log(BUS_N) +
                log(BUSINESS_COUNT) +
                log(RETAIL_COUNT) +
                log(SCHOOL_COUNT) +
                log(dist),
              family = poisson(link = "log"),
              data = flowData,
              na.action = na.exclude)
summary(uncSIM)

Call:
glm(formula = TRIPS ~ log(POPULATION_COUNT) + log(HDB_COUNT) + 
    log(BUS_N) + log(BUSINESS_COUNT) + log(RETAIL_COUNT) + log(SCHOOL_COUNT) + 
    log(dist), family = poisson(link = "log"), data = flowData, 
    na.action = na.exclude)

Coefficients:
                        Estimate Std. Error z value Pr(>|z|)    
(Intercept)           15.2364998  0.0021250  7170.1   <2e-16 ***
log(POPULATION_COUNT) -0.1090088  0.0002404  -453.5   <2e-16 ***
log(HDB_COUNT)         0.5850673  0.0005034  1162.3   <2e-16 ***
log(BUS_N)             0.3287501  0.0005066   649.0   <2e-16 ***
log(BUSINESS_COUNT)   -0.0338775  0.0001751  -193.5   <2e-16 ***
log(RETAIL_COUNT)      0.2298522  0.0001363  1686.8   <2e-16 ***
log(SCHOOL_COUNT)      0.2507692  0.0006293   398.5   <2e-16 ***
log(dist)             -1.4478567  0.0002499 -5794.8   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 99139891  on 64943  degrees of freedom
Residual deviance: 49663447  on 64936  degrees of freedom
AIC: 50017525

Number of Fisher Scoring iterations: 6

Origin Constrained Model

orcSIM <- glm(formula = TRIPS ~ 
                ORIGIN_GRID +
                log(BUSINESS_COUNT) +
                log(RETAIL_COUNT) +
                log(SCHOOL_COUNT) +
                log(dist),
              family = poisson(link = "log"),
              data = flowData,
              na.action = na.exclude)
summary(orcSIM)

Call:
glm(formula = TRIPS ~ ORIGIN_GRID + log(BUSINESS_COUNT) + log(RETAIL_COUNT) + 
    log(SCHOOL_COUNT) + log(dist), family = poisson(link = "log"), 
    data = flowData, na.action = na.exclude)

Coefficients:
                      Estimate Std. Error z value Pr(>|z|)    
(Intercept)          1.701e+01  1.975e-03  8613.7   <2e-16 ***
ORIGIN_GRID         -1.381e-04  4.681e-07  -294.9   <2e-16 ***
log(BUSINESS_COUNT) -1.043e-01  1.737e-04  -600.3   <2e-16 ***
log(RETAIL_COUNT)    2.484e-01  1.329e-04  1868.7   <2e-16 ***
log(SCHOOL_COUNT)    4.432e-01  6.227e-04   711.8   <2e-16 ***
log(dist)           -1.460e+00  2.527e-04 -5778.6   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 99139891  on 64943  degrees of freedom
Residual deviance: 60186383  on 64938  degrees of freedom
AIC: 60540458

Number of Fisher Scoring iterations: 7

Destination Constrained Model

decSIM <- glm(formula = TRIPS ~ 
                log(POPULATION_COUNT) +
                log(HDB_COUNT) +
                log(BUS_N) +
                DESTIN_GRID +
                log(dist),
              family = poisson(link = "log"),
              data = flowData,
              na.action = na.exclude)
summary(decSIM)

Call:
glm(formula = TRIPS ~ log(POPULATION_COUNT) + log(HDB_COUNT) + 
    log(BUS_N) + DESTIN_GRID + log(dist), family = poisson(link = "log"), 
    data = flowData, na.action = na.exclude)

Coefficients:
                        Estimate Std. Error z value Pr(>|z|)    
(Intercept)            1.589e+01  2.135e-03  7444.0   <2e-16 ***
log(POPULATION_COUNT) -4.568e-02  2.361e-04  -193.5   <2e-16 ***
log(HDB_COUNT)         4.725e-01  4.932e-04   958.0   <2e-16 ***
log(BUS_N)             3.255e-01  5.064e-04   642.7   <2e-16 ***
DESTIN_GRID           -1.300e-04  4.463e-07  -291.3   <2e-16 ***
log(dist)             -1.421e+00  2.511e-04 -5657.3   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 99139891  on 64943  degrees of freedom
Residual deviance: 52993882  on 64938  degrees of freedom
AIC: 53347956

Number of Fisher Scoring iterations: 6

Doubly Constrained Model

dbcSIM <- glm(formula = TRIPS ~ 
                ORIGIN_GRID + 
                DESTIN_GRID + 
                log(dist),
              family = poisson(link = "log"),
              data = flowData,
              na.action = na.exclude)
summary(dbcSIM)

Call:
glm(formula = TRIPS ~ ORIGIN_GRID + DESTIN_GRID + log(dist), 
    family = poisson(link = "log"), data = flowData, na.action = na.exclude)

Coefficients:
              Estimate Std. Error z value Pr(>|z|)    
(Intercept)  1.715e+01  1.952e-03  8789.5   <2e-16 ***
ORIGIN_GRID -2.353e-04  1.299e-06  -181.1   <2e-16 ***
DESTIN_GRID  3.064e-04  1.297e-06   236.2   <2e-16 ***
log(dist)   -1.415e+00  2.522e-04 -5610.9   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 99139891  on 64943  degrees of freedom
Residual deviance: 64465365  on 64940  degrees of freedom
AIC: 64819435

Number of Fisher Scoring iterations: 7

Model Comparison

RMSE

model_list <- list(unconstrained=uncSIM,
                   originConstrained=orcSIM,
                   destinationConstrained=decSIM,
                   doublyConstrained=dbcSIM)
compare_performance(model_list,
                    metrics = "RMSE")
# Comparison of Model Performance Indices

Name                   | Model |     RMSE
-----------------------------------------
unconstrained          |   glm | 1605.070
originConstrained      |   glm | 1690.365
destinationConstrained |   glm | 1656.000
doublyConstrained      |   glm | 1736.604

R-Squared

CalcRSquared <- function(observed,estimated){
  r <- cor(observed,estimated)
  R2 <- r^2
  R2
}
CalcRSquared(uncSIM$data$TRIPS, uncSIM$fitted.values)
[1] 0.2261695
CalcRSquared(orcSIM$data$TRIPS, orcSIM$fitted.values)
[1] 0.1414428
CalcRSquared(decSIM$data$TRIPS, decSIM$fitted.values)
[1] 0.1756123
CalcRSquared(dbcSIM$data$TRIPS, dbcSIM$fitted.values)
[1] 0.09347343